30
30
#include " llama.cpp/llama.h"
31
31
#include " llama.cpp/server/server.h"
32
32
#include " llamafile/bestline.h"
33
+ #include " llamafile/chatbot.h"
33
34
#include " llamafile/compute.h"
34
35
#include " llamafile/highlight.h"
35
36
#include " llamafile/llama.h"
36
37
#include " llamafile/llamafile.h"
37
38
#include " llamafile/string.h"
38
39
39
- #define RESET " \e[0m"
40
- #define BOLD " \e[1m"
41
- #define FAINT " \e[2m"
42
- #define UNBOLD " \e[22m"
43
- #define RED " \e[31m"
44
- #define GREEN " \e[32m"
45
- #define MAGENTA " \e[35m"
46
- #define YELLOW " \e[33m"
47
- #define CYAN " \e[36m"
48
- #define UNFOREGROUND " \e[39m"
49
- #define BRIGHT_BLACK " \e[90m"
50
- #define BRIGHT_RED " \e[91m"
51
- #define BRIGHT_GREEN " \e[92m"
52
- #define CLEAR_FORWARD " \e[K"
53
-
54
40
enum Role {
55
41
ROLE_USER,
56
42
ROLE_ASSISTANT,
@@ -188,111 +174,10 @@ static std::string describe_position(llama_pos pos) {
188
174
}
189
175
if (pos > 0 )
190
176
description = std::string (" ... " ) + description;
191
- return lf::collapse (description);
192
- }
193
-
194
- static void on_help (const std::vector<std::string> &args) {
195
- if (args.size () == 1 ) {
196
- fprintf (stderr, " \
197
- " BOLD " available commands" RESET " \n \
198
- /clear restart conversation\n \
199
- /context print context window usage\n \
200
- /dump [FILE] print or save context window to file\n \
201
- /exit end program\n \
202
- /help [COMMAND] show help\n \
203
- /manual [on|off] toggle manual role mode\n \
204
- /pop restore context window size\n \
205
- /push push context window size to stack\n \
206
- /stack prints context window stack\n \
207
- /stats print performance metrics\n \
208
- /undo erases last message in conversation\n \
209
- " );
210
- } else if (args[1 ] == " context" ) {
211
- fprintf (stderr, " \
212
- usage: /context" RESET " \n \
213
- prints information about context window usage. this helps you know how\n \
214
- soon you're going to run out of tokens for the current conversation.\n \
215
- " );
216
- } else if (args[1 ] == " dump" ) {
217
- fprintf (stderr, " \
218
- " BOLD " usage: /dump [FILE]" RESET " \n \
219
- dumps raw tokens for current conversation history. special tokens are\n \
220
- printed in the a model specific chat syntax. this is useful for seeing\n \
221
- specifically what data is being evaluated by the model. by default it\n \
222
- will be printed to the terminal. if a FILE argument is specified, then\n \
223
- the raw conversation history will be written to that filename.\n \
224
- " );
225
- } else if (args[1 ] == " exit" ) {
226
- fprintf (stderr, " \
227
- " BOLD " usage: /exit" RESET " \n \
228
- this command will cause the process to exit. it is essentially the same\n \
229
- as typing ctrl-d which signals an eof condition. it also does the same\n \
230
- thing as typing ctrl-c when the >>> user input prompt is displayed.\n \
231
- " );
232
- } else if (args[1 ] == " manual" ) {
233
- fprintf (stderr, " \
234
- " BOLD " usage: /manual [on|off]" RESET " \n \
235
- puts the chatbot in manual mode. this is useful if you want to inject\n \
236
- a response as the model rather than the user. it's also possible to add\n \
237
- additional system prompts to the conversation history. when the manual\n \
238
- mode is activated, a hint is displayed next to the '>>>' indicating\n \
239
- the current role, which can be 'user', 'assistant', or 'system'. if\n \
240
- enter is pressed on an empty line, then llamafile will cycle between\n \
241
- all three roles. when /manual is specified without an argument, it will\n \
242
- toggle manual mode. otherwise an 'on' or 'off' argument is supplied.\n \
243
- " );
244
- } else if (args[1 ] == " help" ) {
245
- fprintf (stderr, " \
246
- " BOLD " usage: /help [COMMAND]" RESET " \n \
247
- shows help on how to issue commands to your llamafile. if no argument is\n \
248
- specified, then a synopsis of all available commands will be printed. if\n \
249
- a specific command name is given (e.g. /help dump) then documentation on\n \
250
- the usage of that specific command will be printed.\n \
251
- " );
252
- } else if (args[1 ] == " stats" ) {
253
- fprintf (stderr, " \
254
- " BOLD " usage: /stats" RESET " \n \
255
- prints performance statistics for current session. this includes prompt\n \
256
- evaluation time in tokens per second, which indicates prefill speed, or\n \
257
- how quickly llamafile is able to read text. the 'eval time' statistic\n \
258
- gives you prediction or token generation speed, in tokens per second,\n \
259
- which tells you how quickly llamafile is able to write text.\n \
260
- " );
261
- } else if (args[1 ] == " clear" ) {
262
- fprintf (stderr, " \
263
- usage: /clear" RESET " \n \
264
- start conversation over from the beginning. this command adjusts the\n \
265
- context window to what it was after the initial system prompt. this\n \
266
- command also erases the /push stack.\n \
267
- " );
268
- } else if (args[1 ] == " push" ) {
269
- fprintf (stderr, " \
270
- usage: /push" RESET " \n \
271
- save current size of context window to stack. this command may be used\n \
272
- with /pop to backtrack a conversation.\n \
273
- " );
274
- } else if (args[1 ] == " pop" ) {
275
- fprintf (stderr, " \
276
- usage: /pop" RESET " \n \
277
- restores size of context window from stack. this command may be used\n \
278
- with /push to backtrack a conversation.\n \
279
- " );
280
- } else if (args[1 ] == " stack" ) {
281
- fprintf (stderr, " \
282
- usage: /stack" RESET " \n \
283
- prints the current conversation stack, created by /push commands.\n \
284
- the stack consists of token offsets within the context window.\n \
285
- " );
286
- } else if (args[1 ] == " undo" ) {
287
- fprintf (stderr, " \
288
- usage: /undo" RESET " \n \
289
- erases last exchange in conversation. in the normal mode, this includes\n \
290
- what the assistant last said, as well as the question that was asked. in\n \
291
- manual mode, this will erase only the last chat message.\n \
292
- " );
293
- } else {
294
- fprintf (stderr, BRIGHT_RED " %s: unknown command" RESET " \n " , args[1 ].c_str ());
295
- }
177
+ description = lf::collapse (description);
178
+ if (!pos && description.empty ())
179
+ description = " <absolute beginning>" ;
180
+ return description;
296
181
}
297
182
298
183
static void on_manual (const std::vector<std::string> &args) {
@@ -323,6 +208,7 @@ static void on_clear(const std::vector<std::string> &args) {
323
208
llama_kv_cache_seq_rm (g_ctx, 0 , g_system_prompt_tokens, tokens_used () - g_system_prompt_tokens);
324
209
g_history.resize (g_system_prompt_tokens);
325
210
g_stack.clear ();
211
+ fix_stacks ();
326
212
}
327
213
328
214
static void print_stack (void ) {
@@ -426,6 +312,11 @@ static void on_completion(const char *line, int pos, bestlineCompletions *comp)
426
312
427
313
// handle irc style commands like: `/arg0 arg1 arg2`
428
314
static bool handle_command (const char *command) {
315
+ if (!strcmp (command, " /?" )) {
316
+ const std::vector<std::string> args = {" ?" };
317
+ on_help (args);
318
+ return true ;
319
+ }
429
320
if (!(command[0 ] == ' /' && std::isalpha (command[1 ])))
430
321
return false ;
431
322
std::vector<std::string> args;
@@ -435,7 +326,7 @@ static bool handle_command(const char *command) {
435
326
args.push_back (arg);
436
327
if (args[0 ] == " exit" || args[0 ] == " bye" ) {
437
328
exit (0 );
438
- } else if (args[0 ] == " help" || args[ 0 ] == " ? " ) {
329
+ } else if (args[0 ] == " help" ) {
439
330
on_help (args);
440
331
} else if (args[0 ] == " stats" ) {
441
332
on_stats (args);
@@ -671,11 +562,11 @@ int chatbot_main(int argc, char **argv) {
671
562
free (line);
672
563
continue ;
673
564
}
565
+ g_said_something = true ;
674
566
if (handle_command (line)) {
675
567
free (line);
676
568
continue ;
677
569
}
678
- g_said_something = true ;
679
570
bool add_assi = !g_manual_mode;
680
571
std::vector<llama_chat_msg> chat = {{get_role_name (g_role), line}};
681
572
std::string msg = llama_chat_apply_template (g_model, params.chat_template , chat, add_assi);
0 commit comments